*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This do file assigns grid identifier to our big dat establishments

* Clean Global Cities list
	import excel "$data_USMETROS/Raw Numeric Data/METRO LIST/METRO_OVERLAP_GLOBAL_US.xlsx", first clear 
	duplicates drop  Metro, force //  drop those where multiple form official cbsa
	rename Metroidentifier cbsafp
	keep cbsafp
	save "$temp/cbsafp_US_POIs", replace  
 
* US MSA list and merge cleaned Global Cities list 
	import delimited "$data_USMETROS/Raw Numeric Data/METRO LIST/METROS.csv", clear
	merge 1:1 cbsafp using  "$temp/cbsafp_US_POIs"
	drop if _merge!=3 // Only keep cities in both data sets

* Load grids and merge with primepoints 
	levelsof  cbsafp, local(USMETROIDS) // reads list of relevant cbsa identifiers

* Loop over cbsa identifiers in list and assign grid identifier	
	foreach USmid of local USMETROIDS{
		display "...assigning big data establishments to grid cells for CBSA: `USmid'"
		qui clear 
		qui import delimited "$data_USMETROS/GIS Data/US METRO GRIDS/GRID_`USmid'_final"   
		qui rangejoin latitude min_y max_y using  "$data_125cities/CLUSTERING/RAW/primepoints_includingHQs.dta"
		qui  keep if latitude>min_y & latitude< max_y & longitude>min_x & longitude<max_x 
		qui gen cbsa=`USmid' 
		qui save "$temp/PP_`USmid'.dta", replace 
	}

* Compile appended data set of all big data establishments for all cities in list
	clear 
	foreach USmid of local USMETROIDS {
		append using	"$temp/PP_`USmid'"
	}

* Clean data set
	rename type_globa CompanyType
	rename global_com GlobalYesNo
	keep CompanyType GlobalYesNo latitude longitude cellid_unique cbsafp

* Create REFINED establishment type variable
	gen CompanyType_N = .
	replace CompanyType_N=1 if CompanyType=="Accounting Firm" & GlobalYesNo==0
	replace CompanyType_N=2 if CompanyType=="Central Bank" & GlobalYesNo==0
	replace CompanyType_N=3 if CompanyType=="Consultancy" & GlobalYesNo==0
	replace CompanyType_N=4 if CompanyType=="Insurance" & GlobalYesNo==0
	replace CompanyType_N=5 if CompanyType=="Investment Bank" & GlobalYesNo==0
	replace CompanyType_N=6 if CompanyType=="Law Firm" & GlobalYesNo==0
	replace CompanyType_N=7 if CompanyType=="Stock Exchange" & GlobalYesNo==0
	replace CompanyType_N=8 if CompanyType=="Accounting Firm" & GlobalYesNo==1
	replace CompanyType_N=9 if CompanyType=="Central Bank" & GlobalYesNo==1
	replace CompanyType_N=10 if CompanyType=="Consultancy" & GlobalYesNo==1
	replace CompanyType_N=11 if CompanyType=="Insurance" & GlobalYesNo==1
	replace CompanyType_N=12 if CompanyType=="Investment Bank" & GlobalYesNo==1
	replace CompanyType_N=13 if CompanyType=="Law Firm" & GlobalYesNo==1
	replace CompanyType_N=14 if CompanyType=="Stock Exchange" & GlobalYesNo==1
	replace CompanyType_N=15 if CompanyType=="TS HQ" & GlobalYesNo==1
	drop CompanyType 
	rename CompanyType_N CompanyType

* Label varaible	
	label define mvalues 1 "Accounting Firm" 2 "Central Bank" 3 "Consultancy" 4 "Insurance" ///
5 "Investment Bank" 6 "Law Firm" 7 "Stock Exchange" 8 "global Accounting Firm" 9  "global Central Bank" 10 "global Consultancy" 11 "global Insurance" 12 "global Investment Bank" 13 "global Law Firm" 14 "global Stock Exchange" 15 "global TS HQ" 
label values CompanyType mvalues

* Save data
	save "$temp/PrimePointsMerged", replace 

 * Script ends
 
 
 